{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "630a2437"
   },
   "source": [
    "# Getting Started Tutorial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "3c3dfc08"
   },
   "source": [
    "To install Evidently using the pip package manager, run:\n",
    "\n",
    "```$ pip install evidently```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 165075,
     "status": "ok",
     "timestamp": 1670449882138,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "aec9a1cc",
    "outputId": "3935b908-73f5-436f-9fca-af065e330a75"
   },
   "outputs": [],
   "source": [
    "try:\n",
    "    import evidently\n",
    "except:\n",
    "    !pip install git+https://github.com/evidentlyai/evidently.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 2869,
     "status": "ok",
     "timestamp": 1670449884998,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "0c00061b"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "\n",
    "from evidently.report import Report\n",
    "from evidently.metrics.base_metric import generate_column_metrics\n",
    "from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset\n",
    "from evidently.metrics import *\n",
    "\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.tests.base_test import generate_column_tests\n",
    "from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset\n",
    "from evidently.tests import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 6,
     "status": "ok",
     "timestamp": 1670449884998,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "56d3e494"
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1dca5a2c"
   },
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 3515,
     "status": "ok",
     "timestamp": 1670449888508,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "5c9d9f9e"
   },
   "outputs": [],
   "source": [
    "data = fetch_california_housing(as_frame=True)\n",
    "housing_data = data.frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 8,
     "status": "ok",
     "timestamp": 1670449888509,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "60692ed0"
   },
   "outputs": [],
   "source": [
    "housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)\n",
    "housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 7,
     "status": "ok",
     "timestamp": 1670449888509,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "f48b2f20"
   },
   "outputs": [],
   "source": [
    "reference = housing_data.sample(n=5000, replace=False)\n",
    "current = housing_data.sample(n=5000, replace=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "fedb4612"
   },
   "source": [
    "## Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 0,
     "output_embedded_package_id": "1fhn3NeCqspz5iFigvZSIKQiNbXzdQC-d"
    },
    "executionInfo": {
     "elapsed": 8243,
     "status": "ok",
     "timestamp": 1670449896746,
     "user": {
      "displayName": "Emeli Dral",
      "userId": "07849725042886651837"
     },
     "user_tz": 0
    },
    "id": "bb77cbe1",
    "outputId": "ff3d9b4c-19ff-4f12-c2d3-45cafeb8354c"
   },
   "outputs": [],
   "source": [
    "report = Report(metrics=[\n",
    "    DataDriftPreset(), \n",
    "])\n",
    "\n",
    "report.run(reference_data=reference, current_data=current)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "1bCO6AcggV2v5gw21oLWAGQo5RYeP_yOu"
    },
    "id": "7e7de377",
    "outputId": "445a5c02-06b6-4149-a983-b9f022b57dba"
   },
   "outputs": [],
   "source": [
    "report = Report(metrics=[\n",
    "    ColumnSummaryMetric(column_name='AveRooms'),\n",
    "    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),\n",
    "    ColumnDriftMetric(column_name='AveRooms'),\n",
    "    \n",
    "])\n",
    "\n",
    "report.run(reference_data=reference, current_data=current)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "1mYzaITssvBlfDbpJZtSKEGmVxMtUsN0J"
    },
    "id": "9e95865e",
    "outputId": "962857b9-c1fb-4186-ad33-673d562ade54"
   },
   "outputs": [],
   "source": [
    "report = Report(metrics=[\n",
    "    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns=['AveRooms', 'AveBedrms']),\n",
    "])\n",
    "\n",
    "report.run(reference_data=reference, current_data=current)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "1JUEabeT61mW16eviYucIPleIfXVoYdc-"
    },
    "id": "fc3f8a8f",
    "outputId": "cc932a1c-8fe0-4ac0-a875-cc25aaa45ac2",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "report = Report(metrics=[\n",
    "    ColumnSummaryMetric(column_name='AveRooms'),\n",
    "    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns='num'),\n",
    "    DataDriftPreset()\n",
    "])\n",
    "\n",
    "report.run(reference_data=reference, current_data=current)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#report.save_html('report.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "d72ca314",
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "report.as_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "ab320537"
   },
   "outputs": [],
   "source": [
    "report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "fd4e9085"
   },
   "outputs": [],
   "source": [
    "#report.save_json('report.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1ea31ae7"
   },
   "source": [
    "## Test Suite "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "1AJKCnZwYYl2cLoRWtKeqF7iIt6Ruuqz-"
    },
    "id": "f2311155",
    "outputId": "825e12fb-8ed6-475e-b046-189225e4cb51"
   },
   "outputs": [],
   "source": [
    "tests = TestSuite(tests=[\n",
    "    TestNumberOfColumnsWithMissingValues(),\n",
    "    TestNumberOfRowsWithMissingValues(),\n",
    "    TestNumberOfConstantColumns(),\n",
    "    TestNumberOfDuplicatedRows(),\n",
    "    TestNumberOfDuplicatedColumns(),\n",
    "    TestColumnsType(),\n",
    "    TestNumberOfDriftedColumns(),\n",
    "])\n",
    "\n",
    "tests.run(reference_data=reference, current_data=current)\n",
    "tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "1ohVLBp0ivJltJprlA1F-AVL7TcppIziI"
    },
    "id": "d78779bb",
    "outputId": "c000d73a-dee7-42e4-b02f-fd08ea97af4a",
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "suite = TestSuite(tests=[\n",
    "    NoTargetPerformanceTestPreset(),\n",
    "])\n",
    "\n",
    "suite.run(reference_data=reference, current_data=current)\n",
    "suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true,
     "output_embedded_package_id": "11IMmTb-z3p9uT6Hlhj9DzmCEq_C9nn-6"
    },
    "id": "e92a0605",
    "outputId": "4aafbda5-e521-44d5-bda1-fb5b65faff12"
   },
   "outputs": [],
   "source": [
    "suite = TestSuite(tests=[\n",
    "    TestColumnDrift('Population'),\n",
    "    TestMeanInNSigmas('HouseAge'),\n",
    "    NoTargetPerformanceTestPreset(columns=['AveRooms', 'AveBedrms', 'AveOccup'])\n",
    "])\n",
    "\n",
    "suite.run(reference_data=reference, current_data=current)\n",
    "suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7df650c7"
   },
   "outputs": [],
   "source": [
    "suite = TestSuite(tests=[\n",
    "    TestNumberOfColumnsWithMissingValues(),\n",
    "    TestNumberOfRowsWithMissingValues(),\n",
    "    TestNumberOfConstantColumns(),\n",
    "    TestNumberOfDuplicatedRows(),\n",
    "    TestNumberOfDuplicatedColumns(),\n",
    "    TestColumnsType(),\n",
    "    TestNumberOfDriftedColumns(),\n",
    "    TestColumnDrift('Population'),\n",
    "    TestShareOfOutRangeValues('Population'),\n",
    "    DataStabilityTestPreset(),\n",
    "    RegressionTestPreset()\n",
    "    \n",
    "])\n",
    "\n",
    "suite.run(reference_data=reference, current_data=current)\n",
    "suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "20da511b",
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "suite.as_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "3c1c476f"
   },
   "outputs": [],
   "source": [
    "suite.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "e4225064"
   },
   "outputs": [],
   "source": [
    "#suite.save_html('test_suite.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5fedd579"
   },
   "outputs": [],
   "source": [
    "#suite.save_json('test_suite.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Support Evidently\n",
    "Did you find the example useful? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "",
   "version": ""
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
